package com.ifyyf.es_study.utils;

import com.ifyyf.es_study.pojo.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

/**
 * @Author if
 * @Description: What is it
 * @Date 2021-10-14 下午 08:00
 */
public class HtmlParseUtil {

    public static List<Content> searchJd(String keys) throws IOException {
        //获取请求地址    https://search.jd.com/Search?enc=utf-8&keyword=java
        String url="https://search.jd.com/Search?enc=utf-8&keyword="+keys;
        //解析网页，document返回的就是js的dom对象
        //所有js中能使用的dom对象的方法，这个对象都能使用
        Document document = Jsoup.parse(new URL(url), 30000);
        Element jGoodsList = document.getElementById("J_goodsList");
//        System.out.println(jGoodsList);
        //获取到所有的li标签元素
        Elements li = jGoodsList.getElementsByTag("li");
        List<Content> list=new ArrayList<>();
        for (Element element : li) {
            //图片特别的多的网站为了响应速度，图片一般是懒加载的
            String imgSrc = element.getElementsByTag("img").eq(0).attr("data-lazy-img");
            String price = element.getElementsByClass("p-price").eq(0).text();
            String title = element.getElementsByClass("p-name p-name-type-2").eq(0).text();
            Content content=new Content(title,imgSrc,price);
            list.add(content);
        }
        return list;
    }
}
